"""
安全客爬虫
"""
from library import library as clib
from crawler import baseScrawler
from datetime import datetime
import json


# 全局变量
connect = clib.Connect()


class anquanke(baseScrawler.baseScrawler):
    # init：当前 ---> xxxx-xx-xx时间
    def __init__(self, start_date, num):
        super().__init__(start_date, num)
        self.mainUrl = "https://www.anquanke.com"
        self.pageUrl = "https://api.anquanke.com"

    # 解析json，json --> 总的列表，若是日期超过上限，则mark=1，停止加入final_all列表
    def json_parse(self, json_text):
        res_dict = json.loads(json_text)
        article_list = res_dict['data']

        for item in article_list:
            dic = {}
            dic['article_link'] = self.mainUrl + "/post/id/" + str(item['id'])
            dic['article_title'] = item['title']
            dic['author_link'] = self.mainUrl + "/member.html?memberId=" + str(item['author']['id'])
            dic['author_name']= item['author']['nickname']
            dic['classification'] = item['tags'][0]
            dic['time'] = item['date'][:10]
            dic['page_view'] = item['pv']

            time = datetime.strptime(dic['time'], "%Y-%m-%d")
            if time <= self.start_date:
                self.mark = 1
                break
            elif self.unique(dic):
                self.final_all.append(dic)
                self.allurl.append(dic.get("author_link"))

    # 启动，返回最终的final_all列表，若失败，则返回空列表
    def start(self):
        page = 0

        while self.mark == 0:
            page = page + 1
            currentUrl = self.pageUrl + ("/data/v1/posts?size=50&page=%d&category=knowledge" % (page))
            ures = connect.request_with_no_flag(currentUrl).text.encode('utf-8')
            ures = bytes.decode(ures)

            if ures.find('4ny0neSec_wrong') == -1:
                self.json_parse(ures)
            else:
                # 可以将失败的存到日志里hhddj
                print("-----------------" + currentUrl + " fail-----------------")
        self.res = self.filter.views_first(self.final_all, self.num)



